import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
x = np.linspace(0,20,100)
y = np.random.randn(100,4)
y = np.cumsum(y, axis = 0)
plt.plot(x,y)
[<matplotlib.lines.Line2D at 0x29ee55ac340>, <matplotlib.lines.Line2D at 0x29ee55ac3a0>, <matplotlib.lines.Line2D at 0x29ee55ac4c0>, <matplotlib.lines.Line2D at 0x29ee55ac5e0>]
sns.set()
# tutaj mozemy zmienic parametry
plt.plot(x,y)
[<matplotlib.lines.Line2D at 0x29ee76bb3d0>, <matplotlib.lines.Line2D at 0x29ee76bb430>, <matplotlib.lines.Line2D at 0x29ee76bb550>, <matplotlib.lines.Line2D at 0x29ee76bb670>]
df = sns.load_dataset('tips')
df.head()
| total_bill | tip | sex | smoker | day | time | size | |
|---|---|---|---|---|---|---|---|
| 0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
| 1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
| 2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
| 3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
| 4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
df.describe()
| total_bill | tip | size | |
|---|---|---|---|
| count | 244.000000 | 244.000000 | 244.000000 |
| mean | 19.785943 | 2.998279 | 2.569672 |
| std | 8.902412 | 1.383638 | 0.951100 |
| min | 3.070000 | 1.000000 | 1.000000 |
| 25% | 13.347500 | 2.000000 | 2.000000 |
| 50% | 17.795000 | 2.900000 | 2.000000 |
| 75% | 24.127500 | 3.562500 | 3.000000 |
| max | 50.810000 | 10.000000 | 6.000000 |
df.describe(include = ['category'])
| sex | smoker | day | time | |
|---|---|---|---|---|
| count | 244 | 244 | 244 | 244 |
| unique | 2 | 2 | 4 | 2 |
| top | Male | No | Sat | Dinner |
| freq | 157 | 151 | 87 | 176 |
sns.relplot(data = df, x = 'total_bill', y='tip')
<seaborn.axisgrid.FacetGrid at 0x29ee55e91b0>
df.plot.scatter(x = 'total_bill', y='tip')
*c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with *x* & *y*. Please use the *color* keyword-argument or provide a 2D array with a single row if you intend to specify the same RGB or RGBA value for all points.
<AxesSubplot:xlabel='total_bill', ylabel='tip'>
sns.set(font_scale = 1.2)
sns.relplot(data = df, x = 'total_bill', y='tip', hue ='day')
<seaborn.axisgrid.FacetGrid at 0x29ee7873310>
sns.relplot(data = df, x = 'total_bill', y='tip', hue ='day', palette = 'viridis')
<seaborn.axisgrid.FacetGrid at 0x29ee78fc430>
sns.relplot(data = df, x = 'total_bill', y='tip', hue ='day', palette = 'viridis', col='time')
<seaborn.axisgrid.FacetGrid at 0x29ee77d4f70>
sns.relplot(data = df, x = 'total_bill', y='tip', hue ='day', palette = 'viridis', col='time', row='smoker')
<seaborn.axisgrid.FacetGrid at 0x29ee7a9c8b0>
df = sns.load_dataset('fmri')
df[:5]
| subject | timepoint | event | region | signal | |
|---|---|---|---|---|---|
| 0 | s13 | 18 | stim | parietal | -0.017552 |
| 1 | s5 | 14 | stim | parietal | -0.080883 |
| 2 | s12 | 18 | stim | parietal | -0.081033 |
| 3 | s11 | 18 | stim | parietal | -0.046134 |
| 4 | s10 | 18 | stim | parietal | -0.037970 |
sns.relplot(data = df, x = 'timepoint', y='signal')
<seaborn.axisgrid.FacetGrid at 0x29ee7beefb0>
sns.relplot(data = df, x = 'timepoint', y='signal', kind = 'line', ci = None)
<seaborn.axisgrid.FacetGrid at 0x29ee7bef160>
df['timepoint'].value_counts()
18 56 10 56 4 56 5 56 6 56 2 56 8 56 7 56 3 56 11 56 14 56 12 56 13 56 0 56 15 56 16 56 9 56 17 56 1 56 Name: timepoint, dtype: int64
df.groupby('timepoint').agg('mean').plot()
<AxesSubplot:xlabel='timepoint'>
sns.relplot(data = df, x = 'timepoint', y='signal', hue = 'event', style = 'event', col = 'subject', col_wrap=4)
<seaborn.axisgrid.FacetGrid at 0x29ee8fff8e0>
df = sns.load_dataset('tips')
df.head()
| total_bill | tip | sex | smoker | day | time | size | |
|---|---|---|---|---|---|---|---|
| 0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
| 1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
| 2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
| 3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
| 4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
sns.set(style = 'ticks', palette = 'pastel')
sns.catplot(data =df, x ='day', y='total_bill')
<seaborn.axisgrid.FacetGrid at 0x29ee8fff550>
sns.catplot(data =df, x ='day', y='total_bill', kind = 'box')
<seaborn.axisgrid.FacetGrid at 0x29eea078370>
df = sns.load_dataset('titanic')
sns.catplot(data =df, x ='sex', y='survived', kind = 'bar', hue = 'class')
<seaborn.axisgrid.FacetGrid at 0x29eea9444f0>
sns.catplot(data =df, x ='deck', kind = 'count')
<seaborn.axisgrid.FacetGrid at 0x29eea9442e0>
sns.catplot(data =df, y ='deck', kind = 'count')
<seaborn.axisgrid.FacetGrid at 0x29eea11f340>
sns.catplot(data =df, y ='deck', kind = 'count', palette = 'Blues')
<seaborn.axisgrid.FacetGrid at 0x29eea9855d0>
df = np.random.multivariate_normal(mean = [0,1], cov = [(1, -0.4), (-0.4, 1)], size =200)
df = pd.DataFrame(df, columns = ['x', 'y'])
df.describe()
| x | y | |
|---|---|---|
| count | 200.000000 | 200.000000 |
| mean | -0.052478 | 0.976274 |
| std | 1.004343 | 1.074738 |
| min | -2.882450 | -2.513433 |
| 25% | -0.700322 | 0.335126 |
| 50% | -0.018109 | 1.010378 |
| 75% | 0.663557 | 1.706312 |
| max | 2.870270 | 3.488807 |
# wykres rozkladow
sns.jointplot(data = df, x ='x', y = 'y')
<seaborn.axisgrid.JointGrid at 0x29eea1f6680>
# wykres relacji parami
df = sns.load_dataset('iris')
sns.pairplot(data = df)
<seaborn.axisgrid.PairGrid at 0x29eea985630>
sns.pairplot(data = df, hue = 'species')
<seaborn.axisgrid.PairGrid at 0x29eea170eb0>
df = sns.load_dataset('tips')
sns.regplot(data = df, x = 'total_bill', y ='tip')
<AxesSubplot:xlabel='total_bill', ylabel='tip'>
df = sns.load_dataset('flights')
df
| year | month | passengers | |
|---|---|---|---|
| 0 | 1949 | Jan | 112 |
| 1 | 1949 | Feb | 118 |
| 2 | 1949 | Mar | 132 |
| 3 | 1949 | Apr | 129 |
| 4 | 1949 | May | 121 |
| ... | ... | ... | ... |
| 139 | 1960 | Aug | 606 |
| 140 | 1960 | Sep | 508 |
| 141 | 1960 | Oct | 461 |
| 142 | 1960 | Nov | 390 |
| 143 | 1960 | Dec | 432 |
144 rows × 3 columns
df = df.pivot(index = 'month', columns = 'year', values = 'passengers')
# mapa ciepla
sns.heatmap(df)
<AxesSubplot:xlabel='year', ylabel='month'>
sns.heatmap(df, annot = True, fmt='d')
<AxesSubplot:xlabel='year', ylabel='month'>
df = sns.load_dataset('fmri')
df
| subject | timepoint | event | region | signal | |
|---|---|---|---|---|---|
| 0 | s13 | 18 | stim | parietal | -0.017552 |
| 1 | s5 | 14 | stim | parietal | -0.080883 |
| 2 | s12 | 18 | stim | parietal | -0.081033 |
| 3 | s11 | 18 | stim | parietal | -0.046134 |
| 4 | s10 | 18 | stim | parietal | -0.037970 |
| ... | ... | ... | ... | ... | ... |
| 1059 | s0 | 8 | cue | frontal | 0.018165 |
| 1060 | s13 | 7 | cue | frontal | -0.029130 |
| 1061 | s12 | 7 | cue | frontal | -0.004939 |
| 1062 | s11 | 7 | cue | frontal | -0.025367 |
| 1063 | s0 | 0 | cue | parietal | -0.006899 |
1064 rows × 5 columns
df = sns.load_dataset('tips')
df.head()
| total_bill | tip | sex | smoker | day | time | size | |
|---|---|---|---|---|---|---|---|
| 0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
| 1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
| 2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
| 3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
| 4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
sns.relplot(data = df, x='total_bill', y ='tip', hue = 'sex')
<seaborn.axisgrid.FacetGrid at 0x29eef2ae890>
sns.catplot(data = df, x='time', y ='tip', kind = 'box')
<seaborn.axisgrid.FacetGrid at 0x29eef293460>
sns.jointplot(data = df, x='total_bill', y ='tip')
<seaborn.axisgrid.JointGrid at 0x29ef0607370>
sns.pairplot(data = df, hue = 'time')
<seaborn.axisgrid.PairGrid at 0x29ef07ca8f0>
sns.regplot(data = df, x = 'total_bill', y = 'tip')
<AxesSubplot:xlabel='total_bill', ylabel='tip'>
df['time'].value_counts().plot(kind = 'pie')
<AxesSubplot:ylabel='time'>
df['tip'].plot(kind = 'hist')
<AxesSubplot:ylabel='Frequency'>
import plotly.express as px
data = px.data.iris()
data
| sepal_length | sepal_width | petal_length | petal_width | species | species_id | |
|---|---|---|---|---|---|---|
| 0 | 5.1 | 3.5 | 1.4 | 0.2 | setosa | 1 |
| 1 | 4.9 | 3.0 | 1.4 | 0.2 | setosa | 1 |
| 2 | 4.7 | 3.2 | 1.3 | 0.2 | setosa | 1 |
| 3 | 4.6 | 3.1 | 1.5 | 0.2 | setosa | 1 |
| 4 | 5.0 | 3.6 | 1.4 | 0.2 | setosa | 1 |
| ... | ... | ... | ... | ... | ... | ... |
| 145 | 6.7 | 3.0 | 5.2 | 2.3 | virginica | 3 |
| 146 | 6.3 | 2.5 | 5.0 | 1.9 | virginica | 3 |
| 147 | 6.5 | 3.0 | 5.2 | 2.0 | virginica | 3 |
| 148 | 6.2 | 3.4 | 5.4 | 2.3 | virginica | 3 |
| 149 | 5.9 | 3.0 | 5.1 | 1.8 | virginica | 3 |
150 rows × 6 columns
px.scatter(data, x = 'sepal_length', y = 'sepal_width', width = 500, height = 400, color = 'species')
px.scatter(data, x = 'sepal_length', y = 'sepal_width', width = 500, height = 400, color = 'species', trendline = 'ols')
px.histogram(data, x = 'sepal_length')
df = sns.load_dataset('tips')
df
| total_bill | tip | sex | smoker | day | time | size | |
|---|---|---|---|---|---|---|---|
| 0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
| 1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
| 2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
| 3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
| 4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 239 | 29.03 | 5.92 | Male | No | Sat | Dinner | 3 |
| 240 | 27.18 | 2.00 | Female | Yes | Sat | Dinner | 2 |
| 241 | 22.67 | 2.00 | Male | Yes | Sat | Dinner | 2 |
| 242 | 17.82 | 1.75 | Male | No | Sat | Dinner | 2 |
| 243 | 18.78 | 3.00 | Female | No | Thur | Dinner | 2 |
244 rows × 7 columns
df['total_bill'] = pd.cut(x = df['total_bill'], bins = 2, labels = ['malo','duzo'])
df
| total_bill | tip | sex | smoker | day | time | size | |
|---|---|---|---|---|---|---|---|
| 0 | malo | 1.01 | Female | No | Sun | Dinner | 2 |
| 1 | malo | 1.66 | Male | No | Sun | Dinner | 3 |
| 2 | malo | 3.50 | Male | No | Sun | Dinner | 3 |
| 3 | malo | 3.31 | Male | No | Sun | Dinner | 2 |
| 4 | malo | 3.61 | Female | No | Sun | Dinner | 4 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 239 | duzo | 5.92 | Male | No | Sat | Dinner | 3 |
| 240 | duzo | 2.00 | Female | Yes | Sat | Dinner | 2 |
| 241 | malo | 2.00 | Male | Yes | Sat | Dinner | 2 |
| 242 | malo | 1.75 | Male | No | Sat | Dinner | 2 |
| 243 | malo | 3.00 | Female | No | Thur | Dinner | 2 |
244 rows × 7 columns
sns.catplot(data = df, x = 'total_bill', y = 'tip', kind = 'box')
<seaborn.axisgrid.FacetGrid at 0x29ef3b60b20>
df = sns.load_dataset('tips')
baza = df.groupby(['day','time'])['tip'].mean().reset_index()
baza
| day | time | tip | |
|---|---|---|---|
| 0 | Thur | Lunch | 2.767705 |
| 1 | Thur | Dinner | 3.000000 |
| 2 | Fri | Lunch | 2.382857 |
| 3 | Fri | Dinner | 2.940000 |
| 4 | Sat | Lunch | NaN |
| 5 | Sat | Dinner | 2.993103 |
| 6 | Sun | Lunch | NaN |
| 7 | Sun | Dinner | 3.255132 |
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
baza ['tip'] = imputer.fit_transform(baza[['tip']])
sns.heatmap(baza.pivot('day','time', values = 'tip'))
<AxesSubplot:xlabel='time', ylabel='day'>